data(Prestige)

#add column name to first column:
Prestige <- cbind(as_tibble(rownames(Prestige)), Prestige) %>% 
  rename(job = value) #can't wrap in as_tibble to drop row names as this messes up the labeling on Avplots 
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
Prestige %>% 
  kable() %>%
  kable_styling()
job education income women prestige census type
gov.administrators gov.administrators 13.11 12351 11.16 68.8 1113 prof
general.managers general.managers 12.26 25879 4.02 69.1 1130 prof
accountants accountants 12.77 9271 15.70 63.4 1171 prof
purchasing.officers purchasing.officers 11.42 8865 9.11 56.8 1175 prof
chemists chemists 14.62 8403 11.68 73.5 2111 prof
physicists physicists 15.64 11030 5.13 77.6 2113 prof
biologists biologists 15.09 8258 25.65 72.6 2133 prof
architects architects 15.44 14163 2.69 78.1 2141 prof
civil.engineers civil.engineers 14.52 11377 1.03 73.1 2143 prof
mining.engineers mining.engineers 14.64 11023 0.94 68.8 2153 prof
surveyors surveyors 12.39 5902 1.91 62.0 2161 prof
draughtsmen draughtsmen 12.30 7059 7.83 60.0 2163 prof
computer.programers computer.programers 13.83 8425 15.33 53.8 2183 prof
economists economists 14.44 8049 57.31 62.2 2311 prof
psychologists psychologists 14.36 7405 48.28 74.9 2315 prof
social.workers social.workers 14.21 6336 54.77 55.1 2331 prof
lawyers lawyers 15.77 19263 5.13 82.3 2343 prof
librarians librarians 14.15 6112 77.10 58.1 2351 prof
vocational.counsellors vocational.counsellors 15.22 9593 34.89 58.3 2391 prof
ministers ministers 14.50 4686 4.14 72.8 2511 prof
university.teachers university.teachers 15.97 12480 19.59 84.6 2711 prof
primary.school.teachers primary.school.teachers 13.62 5648 83.78 59.6 2731 prof
secondary.school.teachers secondary.school.teachers 15.08 8034 46.80 66.1 2733 prof
physicians physicians 15.96 25308 10.56 87.2 3111 prof
veterinarians veterinarians 15.94 14558 4.32 66.7 3115 prof
osteopaths.chiropractors osteopaths.chiropractors 14.71 17498 6.91 68.4 3117 prof
nurses nurses 12.46 4614 96.12 64.7 3131 prof
nursing.aides nursing.aides 9.45 3485 76.14 34.9 3135 bc
physio.therapsts physio.therapsts 13.62 5092 82.66 72.1 3137 prof
pharmacists pharmacists 15.21 10432 24.71 69.3 3151 prof
medical.technicians medical.technicians 12.79 5180 76.04 67.5 3156 wc
commercial.artists commercial.artists 11.09 6197 21.03 57.2 3314 prof
radio.tv.announcers radio.tv.announcers 12.71 7562 11.15 57.6 3337 wc
athletes athletes 11.44 8206 8.13 54.1 3373 NA
secretaries secretaries 11.59 4036 97.51 46.0 4111 wc
typists typists 11.49 3148 95.97 41.9 4113 wc
bookkeepers bookkeepers 11.32 4348 68.24 49.4 4131 wc
tellers.cashiers tellers.cashiers 10.64 2448 91.76 42.3 4133 wc
computer.operators computer.operators 11.36 4330 75.92 47.7 4143 wc
shipping.clerks shipping.clerks 9.17 4761 11.37 30.9 4153 wc
file.clerks file.clerks 12.09 3016 83.19 32.7 4161 wc
receptionsts receptionsts 11.04 2901 92.86 38.7 4171 wc
mail.carriers mail.carriers 9.22 5511 7.62 36.1 4172 wc
postal.clerks postal.clerks 10.07 3739 52.27 37.2 4173 wc
telephone.operators telephone.operators 10.51 3161 96.14 38.1 4175 wc
collectors collectors 11.20 4741 47.06 29.4 4191 wc
claim.adjustors claim.adjustors 11.13 5052 56.10 51.1 4192 wc
travel.clerks travel.clerks 11.43 6259 39.17 35.7 4193 wc
office.clerks office.clerks 11.00 4075 63.23 35.6 4197 wc
sales.supervisors sales.supervisors 9.84 7482 17.04 41.5 5130 wc
commercial.travellers commercial.travellers 11.13 8780 3.16 40.2 5133 wc
sales.clerks sales.clerks 10.05 2594 67.82 26.5 5137 wc
newsboys newsboys 9.62 918 7.00 14.8 5143 NA
service.station.attendant service.station.attendant 9.93 2370 3.69 23.3 5145 bc
insurance.agents insurance.agents 11.60 8131 13.09 47.3 5171 wc
real.estate.salesmen real.estate.salesmen 11.09 6992 24.44 47.1 5172 wc
buyers buyers 11.03 7956 23.88 51.1 5191 wc
firefighters firefighters 9.47 8895 0.00 43.5 6111 bc
policemen policemen 10.93 8891 1.65 51.6 6112 bc
cooks cooks 7.74 3116 52.00 29.7 6121 bc
bartenders bartenders 8.50 3930 15.51 20.2 6123 bc
funeral.directors funeral.directors 10.57 7869 6.01 54.9 6141 bc
babysitters babysitters 9.46 611 96.53 25.9 6147 NA
launderers launderers 7.33 3000 69.31 20.8 6162 bc
janitors janitors 7.11 3472 33.57 17.3 6191 bc
elevator.operators elevator.operators 7.58 3582 30.08 20.1 6193 bc
farmers farmers 6.84 3643 3.60 44.1 7112 NA
farm.workers farm.workers 8.60 1656 27.75 21.5 7182 bc
rotary.well.drillers rotary.well.drillers 8.88 6860 0.00 35.3 7711 bc
bakers bakers 7.54 4199 33.30 38.9 8213 bc
slaughterers.1 slaughterers.1 7.64 5134 17.26 25.2 8215 bc
slaughterers.2 slaughterers.2 7.64 5134 17.26 34.8 8215 bc
canners canners 7.42 1890 72.24 23.2 8221 bc
textile.weavers textile.weavers 6.69 4443 31.36 33.3 8267 bc
textile.labourers textile.labourers 6.74 3485 39.48 28.8 8278 bc
tool.die.makers tool.die.makers 10.09 8043 1.50 42.5 8311 bc
machinists machinists 8.81 6686 4.28 44.2 8313 bc
sheet.metal.workers sheet.metal.workers 8.40 6565 2.30 35.9 8333 bc
welders welders 7.92 6477 5.17 41.8 8335 bc
auto.workers auto.workers 8.43 5811 13.62 35.9 8513 bc
aircraft.workers aircraft.workers 8.78 6573 5.78 43.7 8515 bc
electronic.workers electronic.workers 8.76 3942 74.54 50.8 8534 bc
radio.tv.repairmen radio.tv.repairmen 10.29 5449 2.92 37.2 8537 bc
sewing.mach.operators sewing.mach.operators 6.38 2847 90.67 28.2 8563 bc
auto.repairmen auto.repairmen 8.10 5795 0.81 38.1 8581 bc
aircraft.repairmen aircraft.repairmen 10.10 7716 0.78 50.3 8582 bc
railway.sectionmen railway.sectionmen 6.67 4696 0.00 27.3 8715 bc
electrical.linemen electrical.linemen 9.05 8316 1.34 40.9 8731 bc
electricians electricians 9.93 7147 0.99 50.2 8733 bc
construction.foremen construction.foremen 8.24 8880 0.65 51.1 8780 bc
carpenters carpenters 6.92 5299 0.56 38.9 8781 bc
masons masons 6.60 5959 0.52 36.2 8782 bc
house.painters house.painters 7.81 4549 2.46 29.9 8785 bc
plumbers plumbers 8.33 6928 0.61 42.9 8791 bc
construction.labourers construction.labourers 7.52 3910 1.09 26.5 8798 bc
pilots pilots 12.27 14032 0.58 66.1 9111 prof
train.engineers train.engineers 8.49 8845 0.00 48.9 9131 bc
bus.drivers bus.drivers 7.58 5562 9.47 35.9 9171 bc
taxi.drivers taxi.drivers 7.93 4224 3.59 25.1 9173 bc
longshoremen longshoremen 8.37 4753 0.00 26.1 9313 bc
typesetters typesetters 10.00 6462 13.58 42.2 9511 bc
bookbinders bookbinders 8.55 3617 70.87 35.2 9517 bc
prestige.mod.2 = lm(prestige ~ education + income + type, data = Prestige)
#all predictors and overall
residualPlots(prestige.mod.2)

##            Test stat Pr(>|Test stat|)   
## education    -0.6836         0.495942   
## income       -2.8865         0.004854 **
## type                                    
## Tukey test   -2.6104         0.009043 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no predictors, only overall
# residualPlots(prestige.mod.2, ~1) 

#only one predictor
# residualPlots(prestige.mod.2, ~education , fitted = F) 

Variation of basic residual plot above:

#Univariate view:
marginalModelPlots(prestige.mod.2)
## Warning in mmps(...): Interactions and/or factors skipped

Added-variable plots (partial-regression plots). Partial relationship adjusted for all other variables in the model (as opposed to univariate above)

avPlots(prestige.mod.2)#, id.n=2)#, id.cex=.6)

#page 295
data(Duncan) #US occupation data
Duncan
##                    type income education prestige
## accountant         prof     62        86       82
## pilot              prof     72        76       83
## architect          prof     75        92       90
## author             prof     55        90       76
## chemist            prof     64        86       90
## minister           prof     21        84       87
## professor          prof     64        93       93
## dentist            prof     80       100       90
## reporter             wc     67        87       52
## engineer           prof     72        86       88
## undertaker         prof     42        74       57
## lawyer             prof     76        98       89
## physician          prof     76        97       97
## welfare.worker     prof     41        84       59
## teacher            prof     48        91       73
## conductor            wc     76        34       38
## contractor         prof     53        45       76
## factory.owner      prof     60        56       81
## store.manager      prof     42        44       45
## banker             prof     78        82       92
## bookkeeper           wc     29        72       39
## mail.carrier         wc     48        55       34
## insurance.agent      wc     55        71       41
## store.clerk          wc     29        50       16
## carpenter            bc     21        23       33
## electrician          bc     47        39       53
## RR.engineer          bc     81        28       67
## machinist            bc     36        32       57
## auto.repairman       bc     22        22       26
## plumber              bc     44        25       29
## gas.stn.attendant    bc     15        29       10
## coal.miner           bc      7         7       15
## streetcar.motorman   bc     42        26       19
## taxi.driver          bc      9        19       10
## truck.driver         bc     21        15       13
## machine.operator     bc     21        20       24
## barber               bc     16        26       20
## bartender            bc     16        28        7
## shoe.shiner          bc      9        17        3
## cook                 bc     14        22       16
## soda.clerk           bc     12        30        6
## watchman             bc     17        25       11
## janitor              bc      7        20        8
## policeman            bc     34        47       41
## waiter               bc      8        32       10
mod.duncan = lm(prestige ~ income + education, data = Duncan)
qqPlot(mod.duncan, id.n=3)

## minister reporter 
##        6        9

p 296

outlierTest(mod.duncan)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
##          rstudent unadjusted p-value Bonferroni p
## minister 3.134519          0.0031772      0.14297

hat-values

suppressWarnings(influenceIndexPlot(mod.duncan , id.n=3))

suppressWarnings(influencePlot(mod.duncan, id.n=3))

##                StudRes        Hat      CookD
## minister     3.1345186 0.17305816 0.56637974
## reporter    -2.3970224 0.05439356 0.09898456
## conductor   -1.7040324 0.19454165 0.22364122
## RR.engineer  0.8089221 0.26908963 0.08096807
mod.duncan.2 = update(mod.duncan, subset = rownames(Duncan) != "minister")
compareCoefs(mod.duncan, mod.duncan.2)
## Calls:
## 1: lm(formula = prestige ~ income + education, data = Duncan)
## 2: lm(formula = prestige ~ income + education, data = Duncan, subset = 
##   rownames(Duncan) != "minister")
## 
##             Model 1 Model 2
## (Intercept)   -6.06   -6.63
## SE             4.27    3.89
##                            
## income        0.599   0.732
## SE            0.120   0.117
##                            
## education    0.5458  0.4330
## SE           0.0983  0.0963
## 
mod.duncan.3 = update(mod.duncan, subset = !(rownames(Duncan) %in% c("conductor","minister")))
compareCoefs(mod.duncan, mod.duncan.2, mod.duncan.3, se=F)
## Calls:
## 1: lm(formula = prestige ~ income + education, data = Duncan)
## 2: lm(formula = prestige ~ income + education, data = Duncan, subset = 
##   rownames(Duncan) != "minister")
## 3: lm(formula = prestige ~ income + education, data = Duncan, subset = 
##   !(rownames(Duncan) %in% c("conductor", "minister")))
## 
##             Model 1 Model 2 Model 3
## (Intercept)   -6.06   -6.63   -6.41
## income        0.599   0.732   0.867
## education     0.546   0.433   0.332

p 300

suppressWarnings(avPlots(mod.duncan, id.n = 3))

dfbs_duncan = as_tibble(data.frame(rownames = row.names(dfbetas(mod.duncan)),dfbetas(mod.duncan)))
dfbs_duncan
## # A tibble: 45 x 4
##    rownames   X.Intercept.     income education
##    <fct>             <dbl>      <dbl>     <dbl>
##  1 accountant   -0.0225     0.000666   0.0359  
##  2 pilot        -0.0254     0.0509    -0.00812 
##  3 architect    -0.00919    0.00648    0.00562 
##  4 author       -0.0000472 -0.0000602  0.000140
##  5 chemist      -0.0658     0.0170     0.0868  
##  6 minister      0.145     -1.22       1.26    
##  7 professor    -0.0763    -0.0137     0.122   
##  8 dentist       0.0819    -0.0463    -0.0529  
##  9 reporter      0.217     -0.102     -0.222   
## 10 engineer     -0.0312     0.0289     0.0159  
## # … with 35 more rows
dfbs_duncan %>% 
  ggplot(aes(x = income, y = education))+
  geom_point()+
  geom_text(data = dfbs_duncan %>% filter(income < -.5 | income > .4),aes(income,education,label=rownames))

ggplotly(dfbs_duncan %>% 
  ggplot(aes(x = income, y = education))+
  geom_point(aes(text = rownames))
)
## Warning: Ignoring unknown aesthetics: text
#manual filter based on graph above to get rownames:
dfbs_duncan %>% 
  filter(education > 1)
## # A tibble: 1 x 4
##   rownames X.Intercept. income education
##   <fct>           <dbl>  <dbl>     <dbl>
## 1 minister        0.145  -1.22      1.26

p304

mod.ornstein = lm(interlocks +1 ~ log(assets) + nation + sector, data = Ornstein)
# par(mfrow=c(1,2))
qqPlot(mod.ornstein, id.n=0)

## [1]  2 44
plot(density(rstudent(mod.ornstein)))

boxCox(mod.ornstein, lambda = seq(0,.6, by=.1))

#max lambda approx .22. 

similar to box Cox above but produces numeric instead of graphical output:

p1 = powerTransform(mod.ornstein)
## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored
summary(p1)
## bcPower Transformation to Normality 
##    Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1    0.2227        0.22        0.126       0.3193
## 
## Likelihood ratio test that transformation parameter is equal to 0
##  (log transformation)
##                            LRT df       pval
## LR test, lambda = (0) 19.75696  1 8.7941e-06
## 
## Likelihood ratio test that no transformation is needed
##                            LRT df       pval
## LR test, lambda = (1) 243.4049  1 < 2.22e-16
# Ornstein1_old = transform(Ornstein,
#                       y1=bcPower(interlocks +1, coef(p1)),
#                       y1round = bcPower(interlocks+1, coef(p1,round=T))
#                       ) #from the book

Ornstein1 = Ornstein %>% 
  mutate(#y1=bcPower(interlocks +1, coef(p1)), #not used. same results as rounded below
         interlocks_transform = bcPower(interlocks+1, lambda = coef(p1,round=T)) 
          )
Ornstein1
##     assets sector nation interlocks interlocks_transform
## 1   147670    BNK    CAN         87            7.6797325
## 2   133000    BNK    CAN        107            8.2475809
## 3   113230    BNK    CAN         94            7.8889363
## 4    85418    BNK    CAN         48            6.1920496
## 5    75477    BNK    CAN         66            6.9628392
## 6    40742    FIN    CAN         69            7.0751001
## 7    40140    TRN    CAN         46            6.0933783
## 8    26866    BNK    CAN         16            3.9486476
## 9    24500    TRN    CAN         77            7.3571781
## 10   23700    MIN     US          6            2.4355835
## 11   23482    FIN    CAN         18            4.1602769
## 12   21512    FIN    CAN         29            5.0864594
## 13   20780    MIN     US         36            5.5443152
## 14   18688    FIN    CAN         20            4.3552382
## 15   18286    BNK    CAN         13            3.5915568
## 16   17910    MIN     US          6            2.4355835
## 17   17784    FIN    CAN         31            5.2250875
## 18   16631    FIN    CAN         27            4.9404501
## 19   16458    FIN    CAN         23            4.6222140
## 20   15280    MIN     US         13            3.5915568
## 21   15140    FIN    CAN         32            5.2918893
## 22   14362    FIN    CAN         28            5.0144334
## 23   14163    AGR    CAN          4            1.9355970
## 24   13820    HLD    CAN         42            5.8858090
## 25   13787    FIN    CAN         17            4.0567484
## 26   12810    TRN    CAN         40            5.7763420
## 27   12080    MIN     US         29            5.0864594
## 28   11250    MIN    CAN         29            5.0864594
## 29   11090    MAN     US         21            4.4473483
## 30   10580    MIN    OTH          3            1.6240827
## 31   10570    MAN    CAN         32            5.2918893
## 32   10568    FIN    CAN         29            5.0864594
## 33   10320    MIN    CAN         40            5.7763420
## 34   10110    TRN     US          5            2.2018668
## 35    9044    WOD    CAN         33            5.3571357
## 36    8395    FIN    CAN         21            4.4473483
## 37    8182    FIN     US         18            4.1602769
## 38    7994    TRN     US         18            4.1602769
## 39    7930    FIN    CAN         13            3.5915568
## 40    7877    MAN     US          2            1.2446483
## 41    7564    FIN     US         22            4.5362592
## 42    7510    FIN     US         13            3.5915568
## 43    7287    FIN     US          2            1.2446483
## 44    7018    BNK    CAN          0            0.0000000
## 45    6629    MIN     US         18            4.1602769
## 46    6571    FIN    CAN          9            3.0081277
## 47    6498    TRN    CAN         31            5.2250875
## 48    6407    AGR    CAN         16            3.9486476
## 49    6286    MIN    CAN         28            5.0144334
## 50    5932    FIN    CAN         34            5.4209070
## 51    5704    MIN    CAN         33            5.3571357
## 52    5479    MER    CAN          7            2.6446249
## 53    5437    TRN     US         12            3.4592772
## 54    5429    MER     US         15            3.8354850
## 55    5366    TRN     US         13            3.5915568
## 56    5035    MIN     US         16            3.9486476
## 57    5021    MIN    OTH         27            4.9404501
## 58    4980    AGR    CAN         12            3.4592772
## 59    4838    TRN    CAN         11            3.3188352
## 60    4634    MIN     US          0            0.0000000
## 61    4592    TRN     US         16            3.9486476
## 62    4390    TRN    CAN         17            4.0567484
## 63    4304    WOD    CAN         55            6.5144598
## 64    4298    AGR    OTH         15            3.8354850
## 65    4227    FIN    CAN         44            5.9913871
## 66    4210    MIN     US         18            4.1602769
## 67    4154    FIN    OTH         20            4.3552382
## 68    4100    MAN     US         19            4.2596528
## 69    4099    MAN     US          9            3.0081277
## 70    4088    FIN    CAN         12            3.4592772
## 71    3960    CON    OTH         17            4.0567484
## 72    3896    AGR    CAN         15            3.8354850
## 73    3879    WOD    CAN         27            4.9404501
## 74    3673    MER    CAN         30            5.1566426
## 75    3654    MIN    OTH         27            4.9404501
## 76    3631    TRN     US         12            3.4592772
## 77    3606    MIN     UK         11            3.3188352
## 78    3570    MER    CAN         28            5.0144334
## 79    3561    MIN     US          8            2.8342429
## 80    3274    TRN    CAN         12            3.4592772
## 81    3152    MAN     US         18            4.1602769
## 82    3058    WOD     UK         23            4.6222140
## 83    2958    WOD    CAN         51            6.3343437
## 84    2927    MIN    OTH         35            5.4832774
## 85    2878    MER    CAN          8            2.8342429
## 86    2814    AGR    CAN         43            5.9390644
## 87    2807    MIN    CAN          4            1.9355970
## 88    2801    WOD    CAN         18            4.1602769
## 89    2786    AGR    OTH         20            4.3552382
## 90    2757    MIN     UK         13            3.5915568
## 91    2667    MIN     UK         22            4.5362592
## 92    2625    AGR     UK         10            3.1689789
## 93    2566    MER     US          1            0.7494995
## 94    2549    HLD     US          6            2.4355835
## 95    2488    AGR    CAN         30            5.1566426
## 96    2285    MIN     US          6            2.4355835
## 97    2281    WOD     US         11            3.3188352
## 98    2182    AGR     US         20            4.3552382
## 99    2165    MIN     US          7            2.6446249
## 100   2164    MIN    OTH         13            3.5915568
## 101   2141    MIN     US          0            0.0000000
## 102   2108    MAN     UK         14            3.7166837
## 103   2086    MIN     US         19            4.2596528
## 104   2025    AGR    CAN          4            1.9355970
## 105   1881    AGR    CAN          2            1.2446483
## 106   1876    MER    CAN          2            1.2446483
## 107   1841    WOD     US          7            2.6446249
## 108   1656    MIN    OTH         25            4.7860944
## 109   1655    MER    CAN         29            5.0864594
## 110   1612    MER    CAN          5            2.2018668
## 111   1603    MIN     US         12            3.4592772
## 112   1601    WOD    CAN         25            4.7860944
## 113   1591    AGR     US          2            1.2446483
## 114   1583    HLD    CAN         25            4.7860944
## 115   1561    MIN     US          2            1.2446483
## 116   1520    MAN     US         16            3.9486476
## 117   1511    WOD     US          3            1.6240827
## 118   1487    MIN    OTH          9            3.0081277
## 119   1482    MAN     US          1            0.7494995
## 120   1477    MAN     US          0            0.0000000
## 121   1469    MIN     US          1            0.7494995
## 122   1434    MIN     US          1            0.7494995
## 123   1427    HLD    CAN          1            0.7494995
## 124   1416    MER    CAN          6            2.4355835
## 125   1378    MIN     US         12            3.4592772
## 126   1372    MIN     US          5            2.2018668
## 127   1343    WOD     UK          5            2.2018668
## 128   1337    MIN     US          0            0.0000000
## 129   1335    MAN    CAN          4            1.9355970
## 130   1315    MIN    OTH          5            2.2018668
## 131   1235    MAN    CAN         33            5.3571357
## 132   1172    AGR    CAN         11            3.3188352
## 133   1154    MAN     US          3            1.6240827
## 134   1154    HLD    CAN          3            1.6240827
## 135   1112    AGR     US          5            2.2018668
## 136   1060    AGR    CAN         25            4.7860944
## 137   1027    MIN    OTH         14            3.7166837
## 138    984    MER     US          1            0.7494995
## 139    978    MAN     US          0            0.0000000
## 140    953    MIN     US         12            3.4592772
## 141    950    WOD    CAN         18            4.1602769
## 142    943    MER     US         11            3.3188352
## 143    904    MIN    CAN         39            5.7200444
## 144    898    AGR     UK          3            1.6240827
## 145    888    WOD    CAN          2            1.2446483
## 146    848    WOD    CAN          8            2.8342429
## 147    844    MAN     US          0            0.0000000
## 148    839    TRN    CAN         11            3.3188352
## 149    832    AGR    CAN         13            3.5915568
## 150    830    MIN     UK          1            0.7494995
## 151    816    MIN    CAN         10            3.1689789
## 152    809    MAN     UK          0            0.0000000
## 153    802    MAN    CAN          0            0.0000000
## 154    798    AGR    CAN         11            3.3188352
## 155    789    MIN     UK          9            3.0081277
## 156    789    MAN     US          6            2.4355835
## 157    782    MER    CAN         11            3.3188352
## 158    780    MAN     US          1            0.7494995
## 159    779    MER    CAN         14            3.7166837
## 160    761    WOD     US          1            0.7494995
## 161    751    AGR    CAN          8            2.8342429
## 162    742    AGR    CAN          7            2.6446249
## 163    727    AGR    CAN          1            0.7494995
## 164    707    AGR     US          9            3.0081277
## 165    704    HLD    CAN         10            3.1689789
## 166    702    MAN    CAN          3            1.6240827
## 167    690    WOD    OTH          0            0.0000000
## 168    677    MAN    CAN         12            3.4592772
## 169    638    MAN     US          6            2.4355835
## 170    637    AGR     US          1            0.7494995
## 171    636    MIN     US          0            0.0000000
## 172    614    CON    CAN          2            1.2446483
## 173    590    MAN     US          2            1.2446483
## 174    589    MIN    OTH         23            4.6222140
## 175    586    TRN    CAN         10            3.1689789
## 176    575    AGR    CAN          1            0.7494995
## 177    566    AGR     US          0            0.0000000
## 178    559    MAN     US          7            2.6446249
## 179    558    AGR    CAN         14            3.7166837
## 180    552    AGR    CAN          7            2.6446249
## 181    548    MIN     US          5            2.2018668
## 182    540    MAN    CAN          6            2.4355835
## 183    539    AGR    CAN          9            3.0081277
## 184    533    TRN    CAN          5            2.2018668
## 185    523    MAN     US          8            2.8342429
## 186    519    MAN     US          8            2.8342429
## 187    519    AGR    CAN          0            0.0000000
## 188    516    TRN    CAN          5            2.2018668
## 189    511    HLD    CAN          0            0.0000000
## 190    510    MAN    CAN         11            3.3188352
## 191    508    MAN    OTH          1            0.7494995
## 192    497    AGR    CAN          4            1.9355970
## 193    495    MAN    CAN          0            0.0000000
## 194    494    MER    CAN          8            2.8342429
## 195    488    MAN    CAN          1            0.7494995
## 196    487    MAN    CAN          8            2.8342429
## 197    471    MER     US          0            0.0000000
## 198    456    MIN     US          5            2.2018668
## 199    456    MAN     US          0            0.0000000
## 200    444    AGR     US          1            0.7494995
## 201    438    MAN     UK         18            4.1602769
## 202    432    MAN     US          1            0.7494995
## 203    432    MAN    CAN          3            1.6240827
## 204    422    WOD     US         11            3.3188352
## 205    407    MIN     US          6            2.4355835
## 206    402    MAN     US          0            0.0000000
## 207    391    AGR    CAN         28            5.0144334
## 208    387    MER    CAN         11            3.3188352
## 209    386    CON    OTH          2            1.2446483
## 210    379    MAN     US         16            3.9486476
## 211    376    MER    CAN          5            2.2018668
## 212    375    MAN     US          8            2.8342429
## 213    372    AGR     US          8            2.8342429
## 214    370    AGR     UK          3            1.6240827
## 215    364    TRN     US          5            2.2018668
## 216    361    MIN     US          2            1.2446483
## 217    359    AGR     US          0            0.0000000
## 218    358    AGR     US          0            0.0000000
## 219    352    WOD    CAN         21            4.4473483
## 220    350    MAN     US          1            0.7494995
## 221    345    MER     US          8            2.8342429
## 222    332    MAN     US          0            0.0000000
## 223    326    AGR    CAN          3            1.6240827
## 224    326    AGR     US          0            0.0000000
## 225    326    MAN    CAN         28            5.0144334
## 226    325    MAN    OTH          0            0.0000000
## 227    318    AGR    CAN          2            1.2446483
## 228    312    MAN     US          2            1.2446483
## 229    305    AGR     UK          4            1.9355970
## 230    304    MER     US          4            1.9355970
## 231    303    WOD     UK          3            1.6240827
## 232    297    CON    CAN          2            1.2446483
## 233    276    MIN    CAN          9            3.0081277
## 234    270    MAN    CAN          4            1.9355970
## 235    261    CON     UK          1            0.7494995
## 236    256    MAN     US          1            0.7494995
## 237    245    MIN    CAN         11            3.3188352
## 238    241    MIN     US          3            1.6240827
## 239    225    AGR     US          6            2.4355835
## 240    225    MIN     UK          8            2.8342429
## 241    220    AGR     US          0            0.0000000
## 242    201    AGR     US          5            2.2018668
## 243    200    MAN     US          0            0.0000000
## 244    188    MAN     US          0            0.0000000
## 245    160    AGR    CAN          0            0.0000000
## 246    158    AGR    CAN          5            2.2018668
## 247    119    AGR    CAN          6            2.4355835
## 248     62    MIN     US          0            0.0000000
# summary(Ornstein1)
mod.ornstein.trans = update(mod.ornstein, interlocks_transform ~., data = Ornstein1)

qqPlot(mod.ornstein.trans, id.n=0)

## [1] 44 60
plot(density(rstudent(mod.ornstein.trans)))

constructed variable plot p 307

mod.orstein.constr_var = update(mod.ornstein, .~. +boxCoxVariable(interlocks+1))
summary(mod.orstein.constr_var) #below is from book, but this is easier code and you just find that variable in table
## 
## Call:
## lm(formula = interlocks + 1 ~ log(assets) + nation + sector + 
##     boxCoxVariable(interlocks + 1), data = Ornstein)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.9153  -3.9372   0.4998   4.1919  13.1209 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -4.39627    2.69620  -1.631 0.104338    
## log(assets)                      2.71779    0.37400   7.267 5.51e-12 ***
## nationOTH                       -0.60586    1.59413  -0.380 0.704251    
## nationUK                        -2.01746    1.58843  -1.270 0.205317    
## nationUS                        -4.75160    0.89731  -5.295 2.74e-07 ***
## sectorBNK                      -10.60540    2.88882  -3.671 0.000299 ***
## sectorCON                       -3.19282    2.79579  -1.142 0.254621    
## sectorFIN                        1.41405    1.76186   0.803 0.423029    
## sectorHLD                       -1.71708    2.37789  -0.722 0.470956    
## sectorMAN                       -0.04559    1.22084  -0.037 0.970241    
## sectorMER                        0.78174    1.56550   0.499 0.618002    
## sectorMIN                        1.98125    1.26012   1.572 0.117246    
## sectorTRN                        1.66995    1.70212   0.981 0.327560    
## sectorWOD                        2.67821    1.60017   1.674 0.095531 .  
## boxCoxVariable(interlocks + 1)   0.61615    0.02421  25.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.806 on 233 degrees of freedom
## Multiple R-squared:  0.8771, Adjusted R-squared:  0.8697 
## F-statistic: 118.7 on 14 and 233 DF,  p-value: < 2.2e-16
# summary(mod.orstein.constr_var)$coef["boxCoxVariable(interlocks + 1)",]#drop = F]
# avPlots(mod.orstein.constr_var) #have to hit enter in console but produces all vars
avPlots(mod.orstein.constr_var, variable = "boxCoxVariable(interlocks + 1)")
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored

transforming predictors compononet-plus-residual plots (or partial-residual-plots)

prestige.mod.3 = update(prestige.mod.2, ~. - type + women)
crPlots(prestige.mod.3,order=2) #book says this is nearly identical to order=1 and CERES plots

crPlots(prestige.mod.3,order=1)

prestige.mod.4 = update(prestige.mod.3, .~. +log2(income) - income)
crPlots(prestige.mod.4,order=2)

prestige.mod.5 = update(prestige.mod.4, .~. - women + poly(women,2))
crPlots(prestige.mod.5,order=1)

summary(prestige.mod.4)$coef
##                   Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept)  -110.96582409 14.84292810 -7.476006 3.269774e-11
## education       3.73050783  0.35438304 10.526767 8.730063e-18
## women           0.04689514  0.02989886  1.568459 1.199974e-01
## log2(income)    9.31466643  1.32651512  7.021907 2.895566e-10
summary(prestige.mod.5)$coef #why does poly have to coefs?
##                    Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)     -110.599683 13.9817384 -7.910296 4.159978e-12
## education          3.770068  0.3474857 10.849564 1.984736e-18
## log2(income)       9.360129  1.2992237  7.204402 1.261807e-10
## poly(women, 2)1   15.088276  9.3356690  1.616197 1.092997e-01
## poly(women, 2)2   15.871413  6.9704389  2.276960 2.498556e-02

p 314

residualPlots(mod.ornstein, ~1 ,fitted = T, id.n=0,quadratic=F,tests=F)
## Warning in plot.window(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter
## Warning in box(...): "id.n" is not a graphical parameter
## Warning in title(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
## graphical parameter

spreadLevelPlot(mod.ornstein)
## Warning in spreadLevelPlot.lm(mod.ornstein): 
## 16 negative fitted values removed

## 
## Suggested power transformation:  0.5540406

p 315 collinearity

data(Ericksen)
Ericksen
##                 minority crime poverty language highschool housing  city
## Alabama             26.1    49    18.9      0.2       43.5     7.6 state
## Alaska               5.7    62    10.7      1.7       17.5    23.6 state
## Arizona             18.9    81    13.2      3.2       27.6     8.1 state
## Arkansas            16.9    38    19.0      0.2       44.5     7.0 state
## California.R        24.3    73    10.4      5.0       26.0    11.8 state
## Colorado            15.2    73    10.1      1.2       21.4     9.2 state
## Connecticut         10.8    58     8.0      2.4       29.7    21.0 state
## Delaware            17.5    68    11.8      0.7       31.4     8.9 state
## Florida             22.3    81    13.4      3.6       33.3    10.1 state
## Georgia             27.6    55    16.6      0.3       43.6    10.2 state
## Hawaii               9.1    75     9.9      5.7       26.2    17.0 state
## Idaho                4.2    48    12.6      1.0       26.3     9.1 state
## Illinois.R           8.1    48     7.7      1.0       29.8    13.5 state
## Indiana.R            7.1    48     9.4      0.5       33.6     9.9 state
## Iowa                 2.3    47    10.1      0.3       28.5    10.4 state
## Kansas               7.9    54    10.1      0.5       26.7     8.5 state
## Kentucky             7.7    34    17.6      0.2       46.9    10.6 state
## Louisiana           31.4    54    18.6      1.1       42.3     9.7 state
## Maine                0.7    44    13.0      1.0       31.3    19.5 state
## Maryland.R          16.7    58     6.8      0.8       28.2    10.5 state
## Massachusetts.R      3.8    53     8.5      2.1       27.4    26.9 state
## Michigan.R           7.0    61     8.7      0.7       29.9     9.4 state
## Minnesota            2.1    48     9.5      0.5       26.9    10.7 state
## Mississippi         35.8    34    23.9      0.2       45.2     7.2 state
## Missouri.R           7.8    45    11.2      0.3       34.9     9.1 state
## Montana              1.5    50    12.3      0.4       25.6    12.8 state
## Nebraska             4.8    43    10.7      0.5       26.6     9.7 state
## Nevada              13.0    88     8.7      1.6       24.5    11.7 state
## New.Hampshire        1.0    47     8.5      0.8       27.7    20.3 state
## New.Jersey          19.0    64     9.5      3.6       32.6    23.7 state
## New.Mexico          38.4    59    17.6      4.6       31.1    10.7 state
## New.York.R           8.0    48     8.9      1.3       29.3    21.6 state
## North.Carolina      23.1    46    14.8      0.2       45.2     8.2 state
## North.Dakota         1.0    30    12.6      0.5       33.6    15.1 state
## Ohio.R               8.9    52     9.6      0.5       32.1    11.3 state
## Oklahoma             8.6    50    13.4      0.5       34.0     8.0 state
## Oregon               3.9    60    10.7      0.8       24.4     7.9 state
## Pennsylvania.R       4.8    33     8.8      0.6       33.6    13.3 state
## Rhode.Island         4.9    59    10.3      3.2       38.9    29.6 state
## South.Carolina      31.0    53    16.6      0.2       46.3     7.9 state
## South.Dakota         0.9    32    16.9      0.4       32.1    12.0 state
## Tennessee           16.4    44    16.4      0.2       43.8     9.4 state
## Texas.R             30.6    55    15.0      4.7       38.7     7.7 state
## Utah                 4.7    58    10.3      0.9       20.0    11.3 state
## Vermont              0.9    50    12.1      0.5       29.0    20.8 state
## Virginia            20.0    46    11.8      0.5       37.6    10.3 state
## Washington           5.4    69     9.8      1.0       22.4     9.4 state
## West.Virginia        3.9    25    15.0      0.2       44.0     9.0 state
## Wisconsin.R          1.7    45     7.9      0.4       29.5    12.8 state
## Wyoming              5.9    49     7.9      0.7       22.1    13.2 state
## Baltimore           55.5   100    22.9      0.7       51.6    23.3  city
## Boston              28.4   135    20.2      4.4       31.6    52.1  city
## Chicago             53.7    66    20.3      6.7       43.8    51.4  city
## Cleveland           46.7   101    22.1      1.6       49.1    36.4  city
## Dallas              41.6   118    14.2      3.1       31.5    12.9  city
## Detroit             65.4   106    21.9      1.6       45.8    18.6  city
## Houston             45.1    80    12.7      5.1       31.6     8.9  city
## Indianapolis        22.5    53    11.5      0.3       33.3    13.6  city
## Los.Angeles         44.4   100    16.4     12.7       31.4    15.0  city
## Milwaukee           27.2    65    13.8      1.6       46.4    27.2  city
## New.York.City       44.0   101    20.0      8.9       39.8    32.2  city
## Philadelphia        41.3    60    20.6      2.2       45.7    21.7  city
## Saint.Louis         46.7   143    21.8      0.5       51.8    40.9  city
## San.Diego           23.6    81    12.4      4.2       21.1    11.2  city
## San.Francisco       24.8   107    13.7      9.2       26.0    20.3  city
## Washington.DC       72.6   102    18.6      1.1       32.9    21.0  city
##                 conventional undercount
## Alabama                    0      -0.04
## Alaska                   100       3.35
## Arizona                   18       2.48
## Arkansas                   0      -0.74
## California.R               4       3.60
## Colorado                  19       1.34
## Connecticut                0      -0.26
## Delaware                   0      -0.16
## Florida                    0       2.20
## Georgia                    0       0.37
## Hawaii                    29       1.46
## Idaho                     56       1.53
## Illinois.R                 0       1.69
## Indiana.R                  0      -0.68
## Iowa                       0      -0.59
## Kansas                    14       0.94
## Kentucky                   0      -1.41
## Louisiana                  0       2.46
## Maine                     40       2.06
## Maryland.R                 0       2.03
## Massachusetts.R            4      -0.57
## Michigan.R                 8       0.89
## Minnesota                 11       1.57
## Mississippi                0       1.52
## Missouri.R                 0       0.81
## Montana                   75       1.81
## Nebraska                  33       0.36
## Nevada                    10       5.08
## New.Hampshire              0      -1.49
## New.Jersey                 0       1.44
## New.Mexico                58       2.69
## New.York.R                 0      -1.48
## North.Carolina             0       1.36
## North.Dakota              70       0.35
## Ohio.R                     0       0.97
## Oklahoma                   0      -0.12
## Oregon                    13       0.93
## Pennsylvania.R             0      -0.78
## Rhode.Island               0       0.74
## South.Carolina             0       6.19
## South.Dakota              84       0.42
## Tennessee                  0      -2.31
## Texas.R                    1       0.27
## Utah                      14       1.14
## Vermont                    0      -1.12
## Virginia                   0       1.11
## Washington                 4       1.48
## West.Virginia              0      -0.69
## Wisconsin.R                9       1.45
## Wyoming                  100       4.01
## Baltimore                  0       6.15
## Boston                     0       2.27
## Chicago                    0       5.42
## Cleveland                  0       5.01
## Dallas                     0       8.18
## Detroit                    0       4.33
## Houston                    0       5.79
## Indianapolis               0       0.31
## Los.Angeles                0       7.52
## Milwaukee                  0       3.17
## New.York.City              0       7.39
## Philadelphia               0       6.41
## Saint.Louis                0       3.60
## San.Diego                  0       0.47
## San.Francisco              0       5.18
## Washington.DC              0       5.93
model_census = lm(undercount ~ . ,data=Ericksen)
summary(model_census)
## 
## Call:
## lm(formula = undercount ~ ., data = Ericksen)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8356 -0.8033 -0.0553  0.7050  4.2467 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.611411   1.720843  -0.355 0.723678    
## minority      0.079834   0.022609   3.531 0.000827 ***
## crime         0.030117   0.012998   2.317 0.024115 *  
## poverty      -0.178369   0.084916  -2.101 0.040117 *  
## language      0.215125   0.092209   2.333 0.023200 *  
## highschool    0.061290   0.044775   1.369 0.176415    
## housing      -0.034957   0.024630  -1.419 0.161259    
## citystate    -1.159982   0.770644  -1.505 0.137791    
## conventional  0.036989   0.009253   3.997 0.000186 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.426 on 57 degrees of freedom
## Multiple R-squared:  0.7077, Adjusted R-squared:  0.6667 
## F-statistic: 17.25 on 8 and 57 DF,  p-value: 1.044e-12
model_census_2 = lm(undercount ~ . -highschool - poverty - housing - city,data=Ericksen)
summary(model_census_2)
## 
## Call:
## lm(formula = undercount ~ . - highschool - poverty - housing - 
##     city, data = Ericksen)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8038 -0.9149  0.1019  0.7776  4.3541 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.975947   0.550062  -3.592 0.000655 ***
## minority      0.075188   0.014259   5.273 1.86e-06 ***
## crime         0.027154   0.010391   2.613 0.011280 *  
## language      0.209353   0.086737   2.414 0.018811 *  
## conventional  0.027296   0.007771   3.512 0.000842 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.467 on 61 degrees of freedom
## Multiple R-squared:  0.6691, Adjusted R-squared:  0.6475 
## F-statistic: 30.84 on 4 and 61 DF,  p-value: 4.781e-14

Check for collinearity (vif > 4)

vif(model_census) #can also be called on glms
##     minority        crime      poverty     language   highschool 
##     5.009065     3.343586     4.625178     1.635568     4.619169 
##      housing         city conventional 
##     1.871745     3.537750     1.691320
Ericksen %>% pull(undercount) %>% hist

model_census_glm = glm(undercount ~ . , data = Ericksen)
model_census_glm #same as lm() above since family not specified
## 
## Call:  glm(formula = undercount ~ ., data = Ericksen)
## 
## Coefficients:
##  (Intercept)      minority         crime       poverty      language  
##     -0.61141       0.07983       0.03012      -0.17837       0.21512  
##   highschool       housing     citystate  conventional  
##      0.06129      -0.03496      -1.15998       0.03699  
## 
## Degrees of Freedom: 65 Total (i.e. Null);  57 Residual
## Null Deviance:       396.8 
## Residual Deviance: 116   AIC: 244.5
vif(model_census_glm)
##     minority        crime      poverty     language   highschool 
##     5.009065     3.343586     4.625178     1.635568     4.619169 
##      housing         city conventional 
##     1.871745     3.537750     1.691320